home *** CD-ROM | disk | FTP | other *** search
/ Enter 2006 September / Enter 09 2006.iso / Internet / SpamExperts Home 1.1 / SpamExperts Home.exe / lib / spamexperts.modules / spambayes / msgs.pyc (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2006-07-14  |  4.0 KB  |  130 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.4)
  3.  
  4. from __future__ import generators
  5. import os
  6. import random
  7. from spambayes.tokenizer import tokenize
  8. HAMTEST = None
  9. SPAMTEST = None
  10. HAMTRAIN = None
  11. SPAMTRAIN = None
  12. SEED = random.randrange(2000000000)
  13.  
  14. class Msg(object):
  15.     __slots__ = ('tag', 'guts')
  16.     
  17.     def __init__(self, dir, name):
  18.         path = dir + '/' + name
  19.         self.tag = path
  20.         f = open(path, 'rb')
  21.         self.guts = f.read()
  22.         f.close()
  23.  
  24.     
  25.     def __iter__(self):
  26.         return tokenize(self.guts)
  27.  
  28.     
  29.     def __hash__(self):
  30.         return hash(self.tag)
  31.  
  32.     
  33.     def __eq__(self, other):
  34.         return self.tag == other.tag
  35.  
  36.     
  37.     def __str__(self):
  38.         return self.guts
  39.  
  40.     
  41.     def __getstate__(self):
  42.         return (self.tag, self.guts)
  43.  
  44.     
  45.     def __setstate__(self, s):
  46.         (self.tag, self.guts) = s
  47.  
  48.  
  49.  
  50. class MsgStream(object):
  51.     __slots__ = ('tag', 'directories', 'keep')
  52.     
  53.     def __init__(self, tag, directories, keep = None):
  54.         self.tag = tag
  55.         self.directories = directories
  56.         self.keep = keep
  57.  
  58.     
  59.     def __str__(self):
  60.         return self.tag
  61.  
  62.     
  63.     def produce(self):
  64.         if self.keep is None:
  65.             for directory in self.directories:
  66.                 for fname in os.listdir(directory):
  67.                     yield Msg(directory, fname)
  68.                 
  69.             
  70.             return None
  71.         
  72.         for directory in self.directories:
  73.             all = os.listdir(directory)
  74.             random.seed(hash(max(all)) ^ SEED)
  75.             random.shuffle(all)
  76.             del all[self.keep:]
  77.             all.sort()
  78.             for fname in all:
  79.                 yield Msg(directory, fname)
  80.             
  81.         
  82.  
  83.     
  84.     def __iter__(self):
  85.         return self.produce()
  86.  
  87.  
  88.  
  89. class HamStream(MsgStream):
  90.     
  91.     def __init__(self, tag, directories, train = 0):
  92.         if train:
  93.             MsgStream.__init__(self, tag, directories, HAMTRAIN)
  94.         else:
  95.             MsgStream.__init__(self, tag, directories, HAMTEST)
  96.  
  97.  
  98.  
  99. class SpamStream(MsgStream):
  100.     
  101.     def __init__(self, tag, directories, train = 0):
  102.         if train:
  103.             MsgStream.__init__(self, tag, directories, SPAMTRAIN)
  104.         else:
  105.             MsgStream.__init__(self, tag, directories, SPAMTEST)
  106.  
  107.  
  108.  
  109. def setparms(hamtrain, spamtrain, hamtest = None, spamtest = None, seed = None):
  110.     '''Set HAMTEST/TRAIN and SPAMTEST/TRAIN.
  111.        If seed is not None, also set SEED.
  112.        If (ham|spam)test are not set, set to the same as the (ham|spam)train
  113.        numbers (backwards compat option).
  114.     '''
  115.     global HAMTRAIN, SPAMTRAIN, HAMTEST, HAMTEST, SPAMTEST, SPAMTEST, SEED
  116.     HAMTRAIN = hamtrain
  117.     SPAMTRAIN = spamtrain
  118.     if hamtest is None:
  119.         HAMTEST = HAMTRAIN
  120.     else:
  121.         HAMTEST = hamtest
  122.     if spamtest is None:
  123.         SPAMTEST = SPAMTRAIN
  124.     else:
  125.         SPAMTEST = spamtest
  126.     if seed is not None:
  127.         SEED = seed
  128.     
  129.  
  130.